package org.dataalgorithms.chap05.mapreduce;

import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
//
import org.apache.log4j.Logger;

/**
 * RelativeFrequencyDriver is driver class for computing relative frequency of words.
 *
 * @author Mahmoud Parsian
 *
 */
public class RelativeFrequencyDriver
        extends Configured implements Tool {

    private static final Logger THE_LOGGER = Logger.getLogger(RelativeFrequencyDriver.class);

    /**
     * Dispatches command-line arguments to the tool by the ToolRunner.
     */
    public static void main(String[] args) throws Exception {
        if (args.length != 3) {
            THE_LOGGER.warn("usage: <window> <input> <output>");
            System.exit(-1);
        }
        //
        int status = ToolRunner.run(new RelativeFrequencyDriver(), args);
        System.exit(status);
    }

    @Override
    public int run(String[] args) throws Exception {
        int neighborWindow = Integer.parseInt(args[0]);
        Path inputPath = new Path(args[1]);
        Path outputPath = new Path(args[2]);

        Job job = new Job(new Configuration(), "RelativeFrequencyDriver");
        job.setJarByClass(RelativeFrequencyDriver.class);
        job.setJobName("RelativeFrequencyDriver");

        // Delete the output directory if it exists already
        FileSystem.get(getConf()).delete(outputPath, true);

        job.getConfiguration().setInt("neighbor.window", neighborWindow);

        FileInputFormat.setInputPaths(job, inputPath);
        FileOutputFormat.setOutputPath(job, outputPath);

        // (key,value) generated by map()
        job.setMapOutputKeyClass(PairOfWords.class);
        job.setMapOutputValueClass(IntWritable.class);

        // (key,value) generated by reduce()
        job.setOutputKeyClass(PairOfWords.class);
        job.setOutputValueClass(DoubleWritable.class);

        job.setMapperClass(RelativeFrequencyMapper.class);
        job.setReducerClass(RelativeFrequencyReducer.class);
        job.setCombinerClass(RelativeFrequencyCombiner.class);
        job.setPartitionerClass(OrderInversionPartitioner.class);
        job.setNumReduceTasks(3);

        long startTime = System.currentTimeMillis();
        job.waitForCompletion(true);
        THE_LOGGER.info("Job Finished in milliseconds: " + (System.currentTimeMillis() - startTime));
        return 0;
    }

}
